race_sessions <- dplyr::filter(session_data, session_name == "Race")
race_sessions <- race_sessions[!duplicated(race_sessions$session_key),] #Just procoutionary, in the current state, this does nothing
Cleaning up the driver data into a usable state prior to use
driver_names <- driver_names %>%
mutate(team_name = ifelse(team_name == "RB", "Racing Bulls", team_name))
driver_names$team_colour <- paste0("#", driver_names$team_colour)
driver_names <- driver_names %>%
group_by(team_name) %>%
mutate(team_colour = first(team_colour)) %>%
ungroup()
race_drivers_all <- dplyr::filter(driver_names, session_key %in% race_sessions$session_key)
limited_drivers <- race_drivers_all %>%
select(-meeting_key, -broadcast_name, -first_name, -last_name)
In this section I am cleaning the pitstop_data$pit_duration to remove na values, and outliers. ### Filtering Down to Race Pitstops
pitstop_data <- pitstop_data %>%
filter(session_key %in% race_sessions$session_key)
pitstop_data <- pitstop_data %>%
left_join(limited_drivers, by = c("session_key", "driver_number")) %>%
filter(!is.na(pit_duration))
summary(pitstop_data$pit_duration)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.8 21.8 23.4 103.0 26.8 2485.9
pitstop_data <- pitstop_data[pitstop_data$pit_duration <= 150,]
pitstop_duration_Q1 <- quantile(pitstop_data$pit_duration, 0.25, na.rm = TRUE)
pitstop_duration_Q3 <- quantile(pitstop_data$pit_duration, 0.75, na.rm = TRUE)
pitstop_iqr <- pitstop_duration_Q3 - pitstop_duration_Q1 #I could also just do this with pitstop_iqr <- IQR(pitstop_data$pit_duration, na.rm = TRUE)
pitstop_median <- median(pitstop_data$pit_duration, na.rm = TRUE)
summary(pitstop_data$pit_duration)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.80 21.70 23.30 24.28 25.30 93.20
# pitstop_data <- pitstop_data %>%
# filter(pit_duration >= (pitstop_duration_Q1 - 1.5 * pitstop_iqr) &
# pit_duration <= (pitstop_duration_Q3 + 1.5 * pitstop_iqr))
Using the Results I have previously gathered, this section will contain several plots created form the aggregated data points
TeamBoxplot <- ggplot(pitstop_data, aes(x = reorder(team_name, pit_duration, median), y = pit_duration, fill = team_colour)) +
geom_boxplot(alpha = 0.6, outlier.shape = NA) +
geom_jitter(width = 0.2, alpha = 0.3, color = "black") +
coord_flip() +
scale_y_continuous(limits = c(quantile(pitstop_data$pit_duration, 0.02, na.rm = TRUE),
quantile(pitstop_data$pit_duration, 0.97, na.rm = TRUE)),
oob = scales::oob_keep) + #looked up
labs(title = "Pit Stop Duration by Team", x = "Team", y = "Pit Stop Duration (s)") +
theme_bw()+
theme(legend.position = "none")+
scale_fill_identity()
ggplotly(TeamBoxplot, width = 1200, height = 700) %>%
layout(autosize = TRUE)
AveragesGraph <- ggplot(pitstop_average, aes(x = reorder(driver_label, Mean), y = Mean, fill = team_colour))+
geom_col()+
scale_fill_identity()+
labs(x = "Name Acronyms", y = "Mean (s)", title = "Averages Plot")+
theme(text = element_text(angle = 45, hjust = 1))
ggplotly(AveragesGraph, width = 1200, height = 700) %>%
layout(autosize = TRUE)